Basic descriptives of overall activity
# PER INFLUENCER
tw <- tw %>%
mutate(PROFILE = gsub("^.*\\.com/([^/]+).*", "\\1", URL))
tw <- as.data.table(tw)
# most active profiles
unique(tw[,.N,PROFILE][order(-N)]) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most popular
tw %>%
group_by(PROFILE) %>%
summarise(FOLLOW = mean(FOLLOWERS_COUNT)) %>%
arrange(desc(FOLLOW)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>%
group_by(PROFILE) %>%
summarise(REACH = sum(REACH)) %>%
arrange(desc(REACH)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>%
group_by(PROFILE) %>%
summarise(INTERACTIONS = sum(INTERACTIONS)) %>%
arrange(desc(INTERACTIONS)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
group_by(PROFILE) %>%
summarise(FAVORITE = sum(FAVORITE_COUNT)) %>%
arrange(desc(FAVORITE)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
group_by(PROFILE) %>%
summarise(RETWEET = sum(RETWEET_COUNT)) %>%
arrange(desc(RETWEET)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# PER TWEET
# most popular
tw %>%
select(PROFILE, FULL_TEXT, FOLLOWERS_COUNT,URL) %>%
arrange(desc(FOLLOWERS_COUNT)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>%
select(PROFILE, FULL_TEXT, REACH,URL) %>%
arrange(desc(REACH)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>%
select(PROFILE, FULL_TEXT, INTERACTIONS,URL) %>%
arrange(desc(INTERACTIONS)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
select(PROFILE, FULL_TEXT, FAVORITE_COUNT,URL) %>%
arrange(desc(FAVORITE_COUNT)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>%
select(PROFILE, FULL_TEXT, RETWEET_COUNT,URL) %>%
arrange(desc(RETWEET_COUNT)) %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
Check forum activity
## word sentiment brija
## 1: gould 0.58639 POZ
## 2: izgovarati 0.56071 NEG
## 3: nebeski 0.59666 POZ
## 4: opus 0.36952 NEG
## 5: poslenik 0.27655 POZ
## 6: brodar 0.39643 POZ
## 7: schultz 0.14534 NEG
## 8: ingra 0.30493 NEG
## 9: pridavati 0.57727 POZ
## 10: kuta 0.60072 NEG
## 11: ekskluzivan 0.50823 NEG
## 12: toranj 0.50855 POZ
## 13: selektivnost 0.19562 NEG
## 14: kapetanica 0.32821 NEG
## 15: doći 0.39604 POZ
# read in data
forum <- as.data.table(forum)
forum[,.N, TITLE][order(-N)] %>%
slice(1:1000) %>%
datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
forum[TITLE == "Zoran Milanović, predsjednik Republike Hrvatske vol. IV",] %>%
unnest_tokens(word,FULL_TEXT) -> ZM_token
# remove stop words, numbers, single letters
ZM_token %>%
anti_join(stop_corpus, by = "word") %>%
mutate(word = gsub("\\d+", NA, word)) %>%
mutate(word = gsub("^[a-zA-Z]$", NA, word)) -> ZM_tokenTidy
# remove NA
ZM_tokenTidy %>%
filter(!is.na(word)) -> ZM_tokenTidy
ZM_tokenTidy[,.N,by = word][order(-N),]
## word N
## 1: quote 3264
## 2: milanović 1643
## 3: hdz 1274
## 4: onda 838
## 5: predsjednik 811
## ---
## 38457: sišao 1
## 38458: oblacima 1
## 38459: smotre 1
## 38460: rasipanje 1
## 38461: hašomana 1
## Vizualize most common words
ZM_tokenTidy[,.N,by = word][N>500][order(-N),][,word := reorder(word,N)] %>%
ggplot(aes(word, N)) +
geom_col() +
xlab(NULL) +
coord_flip() +
theme_economist()
